libs <- c("bench", "tidyverse", "yaml", "rvest")
for (lib in libs)
require(lib, character.only = TRUE)
config <- read_yaml( "config.yaml" )# 下地
g <- ggplot()
# ヒストグラム
# binの大きさは自動で設定してくれる
ggplot() +
geom_histogram(data = mpg, mapping = aes(x = displ))## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# 密度分布
# geom_densityで描くとキレイではないので, lineでstatをdensityにする
ggplot(mpg) +
geom_line(mapping = aes(x = displ), stat = "density")色を分けたい場合には変数を因子型として離散変数と見なされる ように配慮する
mpg %>%
mutate(cyl_fct = factor(cyl, levels = 4:8, order = TRUE)) %>%
ggplot(mapping = aes(x = displ, y = cty, group = cyl_fct, colour = cyl_fct)) +
geom_point()# グループ化することで近似曲線も描ける
mpg %>%
mutate(cyl_fct = factor(cyl, levels = 4:8, order = TRUE)) %>%
ggplot(mapping = aes(x = displ, y = cty, group = cyl_fct, colour = cyl_fct)) +
geom_point() +
geom_smooth(method = "lm")# こんな絵も記述できる
ggplot(data=mpg, mapping = aes(x = displ, y = cty)) +
geom_point(colour = "chocolate", shape = 35, size = 10) +
geom_smooth(method = "lm", linetype = "dashed", se = FALSE)データセットではないが, 簡易的に, つまりはベクトルで データを追加したい場合にはannotateを使う.
add_x <- c(2.5, 3, 3.5)
add_y <- c(25, 27.5, 30)
ggplot(data = mpg, mapping = aes(x = displ, y = cty)) +
geom_point() +
annotate(geom = "point", x = add_x, y = add_y, colour = "red") +
annotate(geom = "text", x = c(5, 5), y = c(30, 25), label = c("要チェック", "赤色のデータを追加"))x軸が離散変数の場合.
mean_cty <-
mpg %>%
group_by(class) %>%
summarise(cty = mean(cty))
ggplot(mean_cty, aes(class, cty)) +
geom_bar(stat = "identity")ggplot(mpg, aes(class, cty)) +
geom_bar(stat = "summary", fun.y = "mean")要約統計量を知る. 関数はオリジナルでも大丈夫のよう.
q10 <- partial(quantile, prob = .1)
q90 <- partial(quantile, prob = .9)
mpg %>%
ggplot(mapping = aes(x = class, y = cty)) +
stat_summary(geom = "pointrange", fun.y = "mean", fun.ymax = "max", fun.ymin = "min") +
stat_summary(geom = "pointrange", fun.y = "median", fun.ymax = "q90", fun.ymin = "q10", colour ="red")ヴァイオリンプロットは面積に情報を持たせることができる. このグラフを見ると, 点が重なってしまっていることがわかる.
ggplot(data = mpg, mapping = aes(x = class, y = cty)) +
geom_violin(scale = "count") +
geom_point(mapping = aes(colour = class), show.legend = FALSE)jitterで点の重なりを避ける.
ggplot(data = mpg, mapping = aes(x = class, y = cty)) +
stat_summary(geom = "bar", fun.y = "mean") +
geom_jitter(mapping = aes(colour = class), width = .4, height = .0, show.legend = FALSE)ggplot(data = mpg, mapping = aes(x = class, y = cty, fill = factor(year)))+
stat_summary(geom = "bar", fun.y = "mean")+
stat_summary(fun.data = "mean_se")# 中央:position = "stack"
ggplot(data = mpg, mapping = aes(x = class, y = cty, fill = factor(year)))+
geom_bar(stat = "summary", fun.y = "mean")+
stat_summary(fun.data = "mean_se")# 右図:position = "dodge"
ggplot(data = mpg, mapping = aes(x = class, y = cty, fill = factor(year)))+
stat_summary(geom = "bar", fun.y = "mean", position = position_dodge())+
stat_summary(fun.data = "mean_se", position = position_dodge(width = 0.9))coord_cartesianを使う.
ggplot(data = mpg, mapping = aes(x = displ, y = cty)) +
geom_point() +
geom_vline(xintercept = 4) +
geom_hline(yintercept = 15) +
geom_smooth(method = "lm", se = FALSE)# 右図:X軸、Y軸の特定の範囲を拡大表示した場合
ggplot(data = mpg, mapping = aes(x = displ, y = cty)) +
geom_point() +
coord_cartesian(xlim = c(1.5, 4.5), ylim = c(10, 35)) +
geom_vline(xintercept = 4) +
geom_hline(yintercept = 15) +
geom_smooth(method = "lm", se = FALSE)themeで頑張れる. themeをどのように使うのかは, theme_bw()の中身や結果を 確かめながらやる.
# 関数としてのtheme
theme_bw## function (base_size = 11, base_family = "")
## {
## theme_grey(base_size = base_size, base_family = base_family) %+replace%
## theme(panel.background = element_rect(fill = "white",
## colour = NA), panel.border = element_rect(fill = NA,
## colour = "grey20"), panel.grid.major = element_line(colour = "grey92"),
## panel.grid.minor = element_line(colour = "grey92",
## size = 0.25), strip.background = element_rect(fill = "grey85",
## colour = "grey20"), legend.key = element_rect(fill = "white",
## colour = NA), complete = TRUE)
## }
## <bytecode: 0x000000001cfb0100>
## <environment: namespace:ggplot2>
ggplot(data = mpg, mapping = aes(x = drv, y = cty, fill = drv)) +
geom_boxplot() +
scale_fill_manual(values = c("4" = "black", "f" = "grey", "r" = "#ffffff"))ggplot(data = mpg, mapping = aes(x = displ, y = cty, group = factor(cyl), colour = factor(cyl))) +
geom_point() +
labs(
title = "エンジンの大きさと市街地における燃費の関係",
subtitle = "1999年と2008年のデータを用いて",
caption = "出典:xxx",
x = "エンジンの大きさ(L)",
y = "市街地における燃費(mpg)",
colour = "シリンダー数"
) +
theme_light(base_size = 18)GUIベースでggplotやPlotyの設定できるのでとても便利. またはRStudioのアドインであるggThemeAssistを使えばよいらしい.
library(ggplotgui)
ggplot_shiny(data = mpg)# ggplotオブジェクトを渡して, Plotlyを起動させることもできる.
library(plotly)##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
g <- ggplot(data = mpg, mapping = aes(x = class, y = displ, colour =
class)) +
theme_bw() +
geom_violin() +
geom_jitter(size = 1, alpha = 0.5, width = 0.25, colour = "black")
ggplotly(p = g)## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
library(ggpubr)## Loading required package: magrittr
##
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
##
## set_names
## The following object is masked from 'package:tidyr':
##
## extract
g1 <- ggplot(data = mpg, mapping = aes(x = displ, y = cty)) +
theme_classic() +
geom_point(colour = "seagreen")
g2 <- ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) +
theme_classic() +
geom_point(colour = "lightskyblue")
ggarrange(g1, g2, labels = c("市街地", "高速道路"), ncol = 2, hjust = -1.5)library(ggthemes)
ggplot(data = mpg, mapping = aes(x = class, y = cty, fill = class)) +
geom_boxplot(show.legend = TRUE) +
scale_fill_colorblind()